%% Monte Carlo Simulations - Canen and Song (2020)
%Based on LRR draft version B55, December 20 - 2020

clear
clc
rng(12123712) %set seed.

%Least Favorable Specification? (i.e. moment inequalities close to equalities)
Specification = 1; %1 or 2? (1 is close to moment equalities)

%Linear Model
n = 500; % dimension of total 
R = 500; %number of simulations
B = 999; %bootstrap repetitions (find k_alpha)
alpha1 = 0.005; %Bonferroni
alpha = 0.05; %size of test
kappa_n = 0.03; %kappa_n = 0.05 for n = 200 (kappa_n = 0.03 works well for Specification 2, but not Spec 1).

%Grid
grid_jump = 0.05; 
range_delta1 = 2.75;
range_gamma = 2.5;

%Parameters (Data)
p_eta = 0.5; %prob. of high type.
p_treatment = 0.3; %prob of treatment (~ what we observe in the data)
sigma_e = 1; %variance of error

%parameters (Linear Model)
%parameters fixed across specifications
delta_1 = 1;
gamma = 0.15; %e.g. ~15% return to college

theta = [delta_1;gamma;p_eta];
d = size(theta,1);

%True Parameter of interest:
beta = p_eta*gamma+(1-p_eta)*(log(delta_1 + gamma)-log(delta_1));

%bounds of Intervals
%Depending on spec (how much top coding)
if Specification==1
Z1 = 2.4; %top-coding in ~ 5% of obs.
Z2 = 7; %Under the parameters above, Z2 = 9 guarantees Y<=Z2 at all 1000000 draws.

%Smaller grid
range_delta1 = 1.75;
range_gamma = 2;

else
Z1 = 1.9;  %top-coding in ~10% of obs. (1.5 for 20% of obs)
Z2 = 7;
end

%Grid for theta (has to be positive, make it large enough to search) 
delta1_grid = 0.01:grid_jump:delta_1+range_delta1; %all values must have lower grid above 0 due to log() parametrization.
gamma_grid = 0:grid_jump:gamma+range_gamma;
p_eta_grid = 0.01:grid_jump/2:0.99;
%% Simulation
%drawing of observables and unobservables
X = ones(1,n); %observables
D = (rand(n,1)<p_treatment);  %treatment occurs at around 10% of obs. (e.g. college), independent of X.

e = sigma_e*randn(n,R); %unobservables
eta = (rand(n,1)>p_eta);%unobserved ability: high in 50% of pop.

%drawing of data
%Outcomes
Y_star = repmat((delta_1+gamma*D).*eta+log(delta_1+gamma*D).*(1-eta),1,R) + e; %if eta=1, linear reduced form; if eta = 0, log reduced form
Ztilde1 = Y_star.*(Y_star<=Z1)+Z1.*(Y_star>Z1); %top-coded Ztilde_1
Ztilde2 = Y_star.*(Y_star<=Z1)+Z2.*(Y_star>Z1); %topc coded Ztilde_2

%Indicator functions for D.
D_eq_1 = (D==1);
D_eq_0 = (D==0);

%Matrix of bootstrap draws
X_order_B = randi(n,[n,B]);

%% Inference

%Store Confidence Intervals
CI_store_LF = cell(R,1);
CI_store_RSW = cell(R,1);

CI_LRR_store_LF = cell(R,1);
CI_LRR_store_RSW = cell(R,1);

%% Simulations

parfor r=1:R
r

%Temp variable for parallel running    
T_beta_temp = zeros(size(delta1_grid,2),size(gamma_grid,1),size(p_eta_grid,1));
critval_temp = zeros(size(delta1_grid,2),size(gamma_grid,1),size(p_eta_grid,1));

%Z1t, Z2t are the Ztilde vectors for each simulation.
Z1t_aux = Ztilde1(:,r);    
Z2t_aux = Ztilde2(:,r);

%Check if we are hitting upper bound of grids
grid_alert = zeros(2,1);

%% Inference 
 
%Store sets necessary for LRR computation
CI_temp_LF = [];
CI_temp_RSW = [];

beta_ID_LF = [];
beta_ID_RSW = [];

beta_LRR_LF = [];
beta_LRR_RSW = [];

%Loop across grid points
for l = 1:size(gamma_grid,2)

Qhat_kappa = zeros(size(delta1_grid,2),size(p_eta_grid,2));
Qhat_minuskappa = zeros(size(delta1_grid,2),size(p_eta_grid,2));
Qhat_0 = zeros(size(delta1_grid,2),size(p_eta_grid,2));
sigma_hat2 = cell(size(delta1_grid,2),size(p_eta_grid,2));
G_p_grid_kappa = []; %G_p_grid_kappa is computed for every beta_grid point...
G_p_grid_minuskappa = [];

for j = 1:size(delta1_grid,2)
     for m = 1:size(p_eta_grid,2)

%Moment conditions for those parameters
m1 = (Z1t_aux.*D_eq_1 - (p_eta_grid(m)*(delta1_grid(j)+gamma_grid(l)) + (1-p_eta_grid(m))*log(delta1_grid(j)+gamma_grid(l))).*D_eq_1);
m2 = (Z1t_aux.*D_eq_0 - (p_eta_grid(m)*(delta1_grid(j)) + (1-p_eta_grid(m))*log(delta1_grid(j))).*D_eq_0);
m3 = (p_eta_grid(m)*(delta1_grid(j)+gamma_grid(l)) + (1-p_eta_grid(m))*log(delta1_grid(j)+gamma_grid(l))).*D_eq_1-Z2t_aux.*D_eq_1;
m4 = (p_eta_grid(m)*(delta1_grid(j)) + (1-p_eta_grid(m))*log(delta1_grid(j))).*D_eq_0-Z2t_aux.*D_eq_0;

%Average Moment.
mbar = [mean(m1),mean(m2),mean(m3),mean(m4)]; %mean over n.

%Variance of Moments
sigma_hat2_temp = [sum((m1-mean(m1)).^2)/size(m1,1),sum((m2-mean(m2)).^2)/size(m2,1),sum((m3-mean(m3)).^2)/size(m3,1),sum((m4-mean(m4)).^2)/size(m4,1)];

%Test Statistic
Qhat_kappa(j,m) = sum(max((mbar./sqrt(sigma_hat2_temp))+kappa_n,0));
Qhat_0(j,m) = sum(max((mbar./sqrt(sigma_hat2_temp)),0));
Qhat_minuskappa(j,m) = sum(max((mbar./sqrt(sigma_hat2_temp))-kappa_n,0));

%Test Statistic
T_beta_temp(j,l,m) = sqrt(n)*Qhat_0(j,m);

%Crit value correction
k1 = find_kalpha_20Dec2020(n,mbar,Z1t_aux,Z2t_aux,D,X_order_B,B,sigma_hat2_temp,delta1_grid(j),gamma_grid(l),p_eta_grid(m),alpha1);
 
%Constructing lambda for beta_j, gamma_k
lambda = zeros(1,size(mbar,2));

for q = 1:size(mbar,2)
aux = sqrt(diag(sigma_hat2_temp));        
lambda(q) = min(mbar(q)-k1*aux(q)/sqrt(n),0); 
end
    
%Critical Value in RSW case.
c_1minusalpha = find_crit_RSW_20Dec2020(n,mbar,Z1t_aux,Z2t_aux,D,X_order_B,B,sigma_hat2_temp,delta1_grid(j),gamma_grid(l),p_eta_grid(m),lambda,alpha,alpha1);
critval_temp(j,l,m) = c_1minusalpha;

%Critical Value in Least Favourable case.
c_1minusalpha_LF = find_crit_LF_20Dec2020(n,mbar,Z1t_aux,Z2t_aux,D,X_order_B,B,sigma_hat2_temp,delta1_grid(j),gamma_grid(l),p_eta_grid(m),alpha);

%Confidence Intervals
Conf_temp_RSW = (sqrt(n)*Qhat_0(j,m)<=c_1minusalpha);
Conf_temp_LF = (sqrt(n)*Qhat_0(j,m)<=c_1minusalpha_LF);

%Is this draw in the Identified Set? (For RSW or LF)
if Conf_temp_RSW==1
   CI_temp_RSW = [CI_temp_RSW; delta1_grid(j),gamma_grid(l),p_eta_grid(m)];
   beta_temp = p_eta_grid(m)*gamma_grid(l)+(1-p_eta_grid(m))*mean(log(delta1_grid(j)+gamma_grid(l))-log(delta1_grid(j)));
   beta_ID_RSW = [beta_ID_RSW; beta_temp];
end

if Conf_temp_LF==1
   CI_temp_LF = [CI_temp_LF; delta1_grid(j),gamma_grid(l),p_eta_grid(m)];
   beta_temp = p_eta_grid(m)*gamma_grid(l)+(1-p_eta_grid(m))*mean(log(delta1_grid(j)+gamma_grid(l))-log(delta1_grid(j)));
   beta_ID_LF = [beta_ID_LF; beta_temp];
   if delta1_grid(j)==max(delta1_grid)
       grid_alert(1) = 1;
   end
   if gamma_grid(l)==max(gamma_grid(l))
       grid_alert(2) = 1;
   end
end

%Store values of parameters satisfying Qhat_kappa = 0, Qhat_minuskappa = 0 (needed to find LRR)
if Qhat_kappa(j,m) == 0
G_p_grid_kappa = [G_p_grid_kappa; delta1_grid(j),gamma_grid(l),p_eta_grid(m)];
end

if Qhat_minuskappa(j,m) == 0
G_p_grid_minuskappa = [G_p_grid_minuskappa; delta1_grid(j),gamma_grid(l),p_eta_grid(m)];
end

    end
end

%Finding LRR (for each grid point of gamma_grid).
if isempty(G_p_grid_kappa)==0
    
%Store vectors for LRR
Q_LRR = zeros(1,size(G_p_grid_kappa,1));
    
for k = 1:size(G_p_grid_kappa,1) %for each gamma consistent with it
%Calculate objective function at parameters values in G_p_grid_kappa
Q_LRR(k) = mean(D)*(G_p_grid_kappa(k,1)+G_p_grid_kappa(k,2)-log(G_p_grid_kappa(k,1)+G_p_grid_kappa(k,2))).^2+(1-mean(D))*(G_p_grid_kappa(k,1)-log(G_p_grid_kappa(k,1))).^2;
end

%Admissible parameters for LRR computation (i.e. find where the values in Gamma_hat_minuskappa are within Gamma_hat_kappa)
[~,aux_find]= intersect(G_p_grid_kappa,G_p_grid_minuskappa,'rows');
Q_LRR_kU_minuskappa = Q_LRR(aux_find);

%For given set of parameters, which minimizes obj function is the LRR
LRR_minimizers = (Q_LRR_kU_minuskappa<=min(Q_LRR)+2*kappa_n);  %values of gamma.
Gamma_LRR_kU = G_p_grid_minuskappa(LRR_minimizers,:);

%Finding LRR CI: %which elements are both in Idset (LF) and in Gamma_LRR_kU?
LRR_set = intersect(CI_temp_LF,Gamma_LRR_kU,'rows'); 

%Computing parameter of interest (ATE) in LRR CI: for each set of parameters in the grid, find beta and store
for k = 1:size(LRR_set,1) 
beta_temp = LRR_set(k,3)*LRR_set(k,2)+(1-LRR_set(k,3))*mean(log(X'*LRR_set(k,1)+LRR_set(k,2))-log(X'*LRR_set(k,1)));
beta_LRR_LF = [beta_LRR_LF;beta_temp];
end

%Finding LRR CI: %which elements are both in Idset (RSW) and in Gamma_LRR_kU?
LRR_set = intersect(CI_temp_RSW,Gamma_LRR_kU,'rows'); 

%Computing parameter of interest (ATE) in LRR CI: for each set of parameters in the grid, find beta and store
for k = 1:size(LRR_set,1) 
beta_temp = LRR_set(k,3)*LRR_set(k,2)+(1-LRR_set(k,3))*mean(log(X'*LRR_set(k,1)+LRR_set(k,2))-log(X'*LRR_set(k,1)));
beta_LRR_RSW = [beta_LRR_RSW; beta_temp];
end

%If this gridpoint is not useful for LRR (empty Gamma_LRR_kU)
else
    
if size(G_p_grid_minuskappa,1)>0
    
    %LF case
LRR_set = intersect(CI_temp_LF,G_p_grid_minuskappa,'rows'); %Look at G_p_grid_minuskappa instead of Gamma_LRR_kU (which is empty)

for k = 1:size(LRR_set,1) 
beta_temp = LRR_set(k,3)*LRR_set(k,2)+(1-LRR_set(k,3))*mean(log(X'*LRR_set(k,1)+LRR_set(k,2))-log(X'*LRR_set(k,1)));
beta_LRR_LF = [beta_LRR_LF; beta_temp];
end

    %RSW case
LRR_set = intersect(CI_temp_RSW,G_p_grid_minuskappa,'rows'); %Look at G_p_grid_minuskappa instead of Gamma_LRR_kU (which is empty)

for k = 1:size(LRR_set,1) 
beta_temp = LRR_set(k,3)*LRR_set(k,2)+(1-LRR_set(k,3))*mean(log(X'*LRR_set(k,1)+LRR_set(k,2))-log(X'*LRR_set(k,1)));
beta_LRR_RSW = [beta_LRR_RSW; beta_temp];
end


end
end

end

%Store
CI_store_LF{r} = beta_ID_LF;
CI_store_RSW{r} = beta_ID_RSW;

CI_LRR_store_LF{r} = beta_LRR_LF;
CI_LRR_store_RSW{r} = beta_LRR_RSW;
end


%% Save Results

DiaryName = strcat('Simulations_n', num2str(n),'_R', num2str(R),'_kappa', num2str(kappa_n), '_B', num2str(B), '_beta', num2str(beta),'_gamma', num2str(gamma), '_1Z', num2str(Z1), '_2Z', num2str(Z2),'.mat');
save(DiaryName)


%% Constructing CI Length and Coverage Probability
% %pairs (beta, gamma) included in the confidence set for more than or equal to 95%, 75%, 50%

%Store results across simulations
Length_ID = zeros(R,2);
Length_LRR = zeros(R,2);
Coverage_ID = zeros(R,2);
Coverage_LRR = zeros(R,2);

for r = 1:R
    
%Compute length of CI for every simulation, r
Length_ID(r,1) = max(CI_store_LF{r}) - min(CI_store_LF{r});
Length_ID(r,2) = max(CI_store_RSW{r}) - min(CI_store_RSW{r});

Length_LRR(r,1) = max(CI_LRR_store_LF{r}) - min(CI_LRR_store_LF{r});
Length_LRR(r,2) = max(CI_LRR_store_RSW{r}) - min(CI_LRR_store_RSW{r});

%Was the true value in the set for every r?
Coverage_ID(r,1) = (min(CI_store_LF{r})<=beta & beta<= max(CI_store_LF{r}));
Coverage_ID(r,2) = (min(CI_store_RSW{r})<=beta & beta<= max(CI_store_RSW{r}));

Coverage_LRR(r,1) = (min(CI_LRR_store_LF{r})<=beta & beta<= max(CI_LRR_store_LF{r}));
Coverage_LRR(r,2) = (min(CI_LRR_store_RSW{r})<=beta & beta<= max(CI_LRR_store_RSW{r}));

end

%% Reported Results

%Coverage Probability
A = strcat('Coverage Probability, Specification number.',' ', num2str(Specification), ', n= ', num2str(n));
disp(A)
disp('Id Set: LF, RSW ')
[mean(Coverage_ID(:,1)),mean(Coverage_ID(:,2))]

disp('LRR: LF, RSW ')
[mean(Coverage_LRR(:,1)),mean(Coverage_LRR(:,2))]

%Confidence Interval Length
A = strcat('Average Confidence Length, Specification number.',' ', num2str(Specification), ', n= ', num2str(n));
disp(A)
disp('Id Set: LF, RSW')
[mean(Length_ID(:,1)),mean(Length_ID(:,2))]

disp('LRR: LF, RSW')
[mean(Length_LRR(:,1)),mean(Length_LRR(:,2))]
